package com.nb.crawler.tmall;

import com.nb.crawler.Crawler;
import com.nb.crawler.html.Element;
import com.nb.crawler.html.HtmlParser;
import com.nb.crawler.html.XPathCallback;
import org.apache.http.HttpResponse;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.HttpClientBuilder;
import org.apache.http.util.EntityUtils;
import org.htmlcleaner.HtmlCleaner;
import org.htmlcleaner.TagNode;
import org.htmlcleaner.XPatherException;

import java.io.IOException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;

/**
 * Created by cuibo on 15/1/13.
 */
public class TMallCrawler implements Crawler {
    private static String listUrl = "http://newbalance.tmall.com/category.htm?orderType=&viewType=grid&keyword=%D0%AC&lowPrice=&highPrice=";

    private final ExecutorService pool = Executors.newFixedThreadPool(10);

    @Override
    public void crawl() {
        Element doc = HtmlParser.parse(listUrl);

        doc.each("//dl", new XPathCallback<Object>() {

            @Override
            public Object call(Element element, String xpath) {
                String href = element.getAttributeByXPath("/dd[@class='detail']/a", "href");

                TMallTask task = new TMallTask(href);
                pool.submit(task);

                return null;
            }
        });
    }
}
